source('00_util_scripts/mod_bplot.R')

pepcol <- c('id','score','seq')

pep_raw <- list.files('~/append-ssd/data_lfs/results/', full.names = T) |>
  read_delim(col_names = pepcol, id = 'file') |>
  mutate(metric = str_extract(file, '(?<=results//).+(?=.txt)'), .keep = 'unused')

pep_raw <- pep_raw |>
  pivot_wider(names_from = metric, values_from = score)

pep_raw |>
  filter(foldx_dG < -5, pyrosetta_dG < -10, interface_hit > 2) |>
  mutate(rank_foldx = rank(foldx_dG), rank_pyrosetta = rank(pyrosetta_dG),
         rank_mean = rank_foldx + rank_pyrosetta) |>
  slice_min(rank_mean, n = 1000)

pep_filter <- pep_raw |>
  filter(foldx_dG < -5, pyrosetta_dG < -10, interface_hit > 4) |>
  mutate(rank_foldx = rank(foldx_dG), rank_pyrosetta = rank(pyrosetta_dG),
         rank_mean = rank(rank_foldx + rank_pyrosetta))

pep_filter |>
  ggplot(aes(-foldx_dG, -pyrosetta_dG, color = rank_mean < 100)) +
  geom_point(aes(size = interface_hit), alpha = .2) +
  theme_bw() +
  labs(title = 'Pepmimic generated 100k peptides mimic anti-CD19 Ab')

pep_final <-
  read_delim('~/append-ssd/data_lfs/final_output/final_candidates.txt',
           col_names = c('seq','interface_hit','info')) |>
  mutate(id = str_remove(info, ';.+'))

pep_filter |>
  filter(id %in% pep_final$id) |>
  ggplot(aes(-foldx_dG, -pyrosetta_dG)) +
  geom_point(aes(size = interface_hit), alpha = .2) +
  theme_bw() +
  labs(title = 'Pepmimic generated 100k peptides mimic anti-CD19 Ab')
